Redact(string,string,IList<PDFRedact>) Method

Summary

Redacts a PDF document stored in a file.

Syntax

C++/CLI

Python

public static void Redact( 
   string fileName, 
   string password, 
   IList<PDFRedact> redacts 
)

public:  
   static void Redact( 
      String^ fileName, 
      String^ password, 
      IList<PDFRedact^>^ redacts 
   )

def Redact(self,redacts):

Parameters

fileName

Name of the file containing an existing PDF document to be redacted.

password

The password to use if fileName contains an encrypted PDF file.

redacts

One or more PDF redact objects.

Remarks

Redaction can be used to remove sensitive information from an existing PDF document.

This method quickly redacts an existing PDF document in place by removing any character, image, or shape that intersects with any of the PDFRedact.Bounds of redacts. The resulting PDF is not re-generated and therefore will maintain the same exact compression, metadata, fonts and any other resources.

Use the following code to redact all data in a PDF page at location 0, 0 to 100,100:

// Create a PDF redaction object 
var redact = new PDFRedact(0, 0, 100, 100); 
// Redact the file: 
PDFFile.Redact(pdfFileName, new List<PDFRedact> { redact });

Example

This example will parse the text of a PDF file, find the locations of all items containing the word "LEADTOOLS" and redacts them.

Java

using Leadtools.WinForms; 
using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Controls; 
using Leadtools.Drawing; 
using Leadtools.ImageProcessing; 
using Leadtools.Pdf; 
using Leadtools.Svg; 
 
 
private static void RedactExample() 
{ 
   const string toRedact = "LEADTOOLS"; 
 
   // Make a copy of 'leadtools.pdf' installed with LEADTOOLS 
   string imagesDir = @"C:\LEADTOOLS22\Resources\Images"; 
   string pdfFileName = Path.Combine(imagesDir, "leadtools-redacted.pdf"); 
   File.Copy(Path.Combine(imagesDir, "leadtools.pdf"), pdfFileName, true); 
 
   // We will use PDFDocument to find the position of the words to redact 
 
   // Find any text containing the word "LEADTOOLS" in the document 
   var allWords = new List<MyPDFWord>(); 
   using (var pdfDocument = new PDFDocument(pdfFileName)) 
   { 
      pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1); 
 
      // Build the words for each page from PDFDocumentPage.Objects 
      foreach (PDFDocumentPage pdfPage in pdfDocument.Pages) 
      { 
         IList<MyPDFWord> words = GetPageWords(pdfPage); 
         allWords.AddRange(words); 
      } 
   } 
 
   // Now create a PDFRedact object for each word that contains the value we want to redact 
   string toRedactLower = toRedact.ToLowerInvariant(); 
   var pdfRedacts = new List<PDFRedact>(); 
   foreach (MyPDFWord word in allWords) 
   { 
      if (word.Value.ToLowerInvariant().Contains(toRedactLower)) 
      { 
         Console.WriteLine($"Found {word.Value} at {word.Bounds} in page {word.PageNumber}"); 
         var pdfRedact = new PDFRedact(); 
         pdfRedact.PageNumber = word.PageNumber; 
         pdfRedact.Bounds = new PDFRect(word.Bounds.Left, word.Bounds.Top, word.Bounds.Right, word.Bounds.Bottom); 
         pdfRedacts.Add(pdfRedact); 
      } 
   } 
 
   // Redact the document 
   PDFFile.Redact(pdfFileName, null, pdfRedacts); 
 
   // Finally, verify that the redact PDF does not have the redacted words anymore 
   using (var pdfDocument = new PDFDocument(pdfFileName)) 
   { 
      pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1); 
 
      // Build the words for each page from PDFDocumentPage.Objects 
      foreach (PDFDocumentPage pdfPage in pdfDocument.Pages) 
      { 
         IList<MyPDFWord> words = GetPageWords(pdfPage); 
 
         foreach (MyPDFWord word in words) 
         { 
            Debug.Assert(!word.Value.ToLowerInvariant().Contains(toRedactLower)); 
         } 
      } 
   } 
} 
 
// Class to define a word in a PDF page 
class MyPDFWord 
{ 
   // Page number 
   public int PageNumber; 
   // The value as a string 
   public string Value; 
   // Its location in the PDF coordinate 
   public LeadRectD Bounds; 
} 
 
private static IList<MyPDFWord> GetPageWords(PDFDocumentPage pdfPage) 
{ 
   var words = new List<MyPDFWord>(); 
 
   IList<PDFObject> objects = pdfPage.Objects; 
   if (objects == null || objects.Count == 0) 
      return words; 
 
   int objectIndex = 0; 
   int objectCount = objects.Count; 
 
   double pageHeight = pdfPage.Height; 
 
   // Loop through all the objects 
   while (objectIndex < objectCount) 
   { 
      // Find the total bounding rectangle, begin and end index of the next word 
      LeadRectD wordBounds = LeadRectD.Empty; 
      int firstObjectIndex = objectIndex; 
 
      // Loop till we reach EndOfWord or reach the end of the objects 
      bool more = true; 
      while (more) 
      { 
         PDFObject pdfObject = objects[objectIndex]; 
         // Is it text? 
         if (pdfObject.ObjectType == PDFObjectType.Text) 
         { 
            PDFRect pdfBounds = pdfObject.Bounds; 
 
            // objectBounds are in bottom-left coordinate, convert it to top-left 
            LeadRectD objectBounds = LeadRectD.FromLTRB(pdfObject.Bounds.Left, pageHeight - pdfObject.Bounds.Top, pdfObject.Bounds.Right, pageHeight - pdfObject.Bounds.Bottom); 
 
            // Add the bounding rectangle of this object 
            if (wordBounds.IsEmpty) 
               wordBounds = objectBounds; 
            else 
               wordBounds = LeadRectD.UnionRects(wordBounds, objectBounds); 
         } 
         else 
         { 
            firstObjectIndex = objectIndex + 1; 
         } 
 
         objectIndex++; 
         more = (objectIndex < objectCount) && !pdfObject.TextProperties.IsEndOfWord && !pdfObject.TextProperties.IsEndOfLine; 
      } 
 
      if (firstObjectIndex == objectIndex) 
      { 
         continue; 
      } 
 
      // From the begin and end index, collect the characters into a string 
      StringBuilder sb = new StringBuilder(); 
      for (int i = firstObjectIndex; i < objectIndex; i++) 
      { 
         if (objects[i].ObjectType == PDFObjectType.Text) 
            sb.Append(objects[i].Code); 
      } 
 
      // Add this word to the list 
 
      PDFObject lastObject = objects[objectIndex - 1]; 
 
      var word = new MyPDFWord(); 
      word.PageNumber = pdfPage.PageNumber; 
      word.Value = sb.ToString(); 
      word.Bounds = wordBounds; 
      words.Add(word); 
   } 
 
   return words; 
}

 
import java.io.ByteArrayInputStream; 
import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileNotFoundException; 
import java.io.FileOutputStream; 
import java.io.FileReader; 
import java.io.IOException; 
import java.io.InputStream; 
import java.nio.file.Files; 
import java.nio.file.Paths; 
import java.nio.file.StandardCopyOption; 
import java.util.ArrayList; 
import java.util.Date; 
import java.util.List; 
import java.util.Scanner; 
 
import org.junit.*; 
import org.junit.runner.JUnitCore; 
import org.junit.runner.Result; 
import org.junit.runner.notification.Failure; 
import static org.junit.Assert.*; 
 
import leadtools.*; 
import leadtools.codecs.*; 
import leadtools.pdf.*; 
 
 
public void pdfFileRedactExample() throws IOException { 
 
   final String toRedact = "LEADTOOLS"; 
 
   // Make a copy of 'leadtools.pdf' installed with LEADTOOLS 
   final String LEAD_VARS_IMAGES_DIR = "C:\\LEADTOOLS23\\Resources\\Images"; 
   String pdfFileName = combine(LEAD_VARS_IMAGES_DIR, "leadtools-redacted.pdf"); 
   Files.copy(Paths.get(combine(LEAD_VARS_IMAGES_DIR, "leadtools.pdf")), Paths.get(pdfFileName), 
         StandardCopyOption.REPLACE_EXISTING); 
 
   // We will use PDFDocument to find the position of the words to redact 
 
   // Find any text containing the word "LEADTOOLS" in the document 
   ArrayList<MyPDFWord> allWords = new ArrayList<MyPDFWord>(); 
   PDFDocument pdfDocument = new PDFDocument(pdfFileName); 
   pdfDocument.parsePages(PDFParsePagesOptions.OBJECTS.getValue(), 1, -1); 
 
   // Build the words for each page from PDFDocumentPage.Objects 
   for (PDFDocumentPage pdfPage : pdfDocument.getPages()) { 
      ArrayList<MyPDFWord> words = GetPageWords(pdfPage); 
      allWords.addAll(words); 
   } 
   pdfDocument = null; 
 
   // Now create a PDFRedact object for each word that contains the value we want 
   // to redact 
   String toRedactLower = toRedact.toLowerCase(); 
   ArrayList<PDFRedact> pdfRedacts = new ArrayList<PDFRedact>(); 
   for (MyPDFWord word : allWords) { 
      if (word.Value.toLowerCase().contains(toRedactLower)) { 
         System.out.println("Found " + word.Value + " at " + word.Bounds + " in page " + word.PageNumber); 
         PDFRedact pdfRedact = new PDFRedact(); 
         pdfRedact.setPageNumber(word.PageNumber); 
         pdfRedact.setBounds(new PDFRect(word.Bounds.getLeft(), word.Bounds.getTop(), word.Bounds.getRight(), 
               word.Bounds.getBottom())); 
         pdfRedacts.add(pdfRedact); 
      } 
   } 
 
   // Redact the document 
   PDFFile.redact(pdfFileName, null, pdfRedacts); 
 
   // Finally, verify that the redact PDF does not have the redacted words anymore 
   pdfDocument = new PDFDocument(pdfFileName); 
   pdfDocument.parsePages(PDFParsePagesOptions.OBJECTS.getValue(), 1, -1); 
 
   // Build the words for each page from PDFDocumentPage.Objects 
   for (PDFDocumentPage pdfPage : pdfDocument.getPages()) { 
      ArrayList<MyPDFWord> words = GetPageWords(pdfPage); 
 
      for (MyPDFWord word : words) { 
         assertTrue(!word.Value.toLowerCase().contains(toRedactLower)); 
      } 
   } 
   pdfDocument = null; 
} 
 
// Class to define a word in a PDF page 
class MyPDFWord { 
 
   // Page number 
   public int PageNumber; 
 
   // The value as a string 
   public String Value; 
 
   // Its location in the PDF coordinate 
   public LeadRectD Bounds; 
 
} 
 
private static ArrayList<MyPDFWord> GetPageWords(PDFDocumentPage pdfPage) { 
   ArrayList<MyPDFWord> words = new ArrayList<MyPDFWord>(); 
 
   List<PDFObject> objects = pdfPage.getObjects(); 
   if (objects == null || objects.size() == 0) 
      return words; 
 
   int objectIndex = 0; 
   int objectCount = objects.size(); 
 
   double pageHeight = pdfPage.getHeight(); 
 
   // Loop through all the objects 
   while (objectIndex < objectCount) { 
      // Find the total bounding rectangle, begin and end index of the next word 
      LeadRectD wordBounds = LeadRectD.getEmpty(); 
      int firstObjectIndex = objectIndex; 
 
      // Loop till we reach EndOfWord or reach the end of the objects 
      boolean more = true; 
      while (more) { 
         PDFObject pdfObject = objects.get(objectIndex); 
         // Is it text? 
         if (pdfObject.getObjectType() == PDFObjectType.TEXT) { 
            PDFRect pdfBounds = pdfObject.getBounds(); 
 
            // objectBounds are in bottom-left coordinate, convert it to top-left 
            LeadRectD objectBounds = LeadRectD.fromLTRB(pdfObject.getBounds().getLeft(), 
                  pageHeight - pdfObject.getBounds().getTop(), pdfObject.getBounds().getRight(), 
                  pageHeight - pdfObject.getBounds().getBottom()); 
 
            // Add the bounding rectangle of this object 
            if (wordBounds.isEmpty()) 
               wordBounds = objectBounds; 
            else 
               wordBounds = LeadRectD.unionRects(wordBounds, objectBounds); 
         } else { 
            firstObjectIndex = objectIndex + 1; 
         } 
 
         objectIndex++; 
         more = (objectIndex < objectCount) && !pdfObject.getTextProperties().isEndOfWord() 
               && !pdfObject.getTextProperties().isEndOfLine(); 
      } 
 
      if (firstObjectIndex == objectIndex) { 
         continue; 
      } 
 
      // From the begin and end index, collect the characters into a string 
      StringBuilder sb = new StringBuilder(); 
      for (int i = firstObjectIndex; i < objectIndex; i++) { 
         if (objects.get(i).getObjectType() == PDFObjectType.TEXT) 
            sb.append(objects.get(i).getCode()); 
      } 
 
      PDFFilesExamples ex = new PDFFilesExamples(); 
      MyPDFWord word = ex.new MyPDFWord(); 
      word.PageNumber = pdfPage.getPageNumber(); 
      word.Value = sb.toString(); 
      word.Bounds = wordBounds; 
      words.add(word); 
   } 
   return words; 
}

Requirements

Target Platforms

Reference

PDFFile Class

PDFFile Members

Leadtools.Pdf Namespace

Download our FREE evaluation

Help Version 23.0.2024.2.29

Leadtools.Pdf Assembly

Introduction

Getting Started

Namespaces

Leadtools.Pdf Namespace

Assemblies